## Hartzell, Hoddie, and Bauer 2010 IO

library(foreign)
library(Amelia)

## Load original dataset
hhb2010 <- read.dta("HHB2010 IO Rep Data.dta")
head(hhb2010)
dim(hhb2010)

hhb2010 <- hhb2010[-1,]
hhb2010$region <- as.numeric(hhb2010$region)

## Drop ID vars
hhb2010$country <-  hhb2010$casename <-  hhb2010$cmark <- hhb2010$waryrs <- hhb2010$cname <- NULL

## Drop derived dummy vars
hhb2010$western <- hhb2010$eeurop <- hhb2010$lamerica <- hhb2010$ssafrica <- hhb2010$asia <-hhb2010$nafrme <- NULL

## Drop calculated parameters
hhb2010$econgrowAVG <-hhb2010$lagecgrowAVG <- hhb2010$lagreservAVG <- hhb2010$sdwars <-hhb2010$sdonset <- hhb2010$sdwarl <- NULL

## Drop imputed vars
hhb2010$econgrowIMPUTE <- hhb2010$lagecgrowIMPUTE <-hhb2010$lagreservIMPUTE <- NULL

## How many variables? 113: reduction necessary
dim(hhb2010)

## Which variables are in analysis and have missing data?
sum(is.na(hhb2010 $onset))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $signed))/nrow(hhb2010)*100
sum(is.na(hhb2010 $gdpenl))/nrow(hhb2010)*100
sum(is.na(hhb2010 $lagecgrow))/nrow(hhb2010)*100
sum(is.na(hhb2010 $lagreserv))/nrow(hhb2010)*100
sum(is.na(hhb2010 $polity2))/nrow(hhb2010)*100
sum(is.na(hhb2010 $polsq))/nrow(hhb2010)*100
sum(is.na(hhb2010 $lpopl1))/nrow(hhb2010)*100 ##??N
sum(is.na(hhb2010 $Oil))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $prevcivwar))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $lagsumun))/nrow(hhb2010)*100
sum(is.na(hhb2010 $lmtnest))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $decadeone))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $decadetwo))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $decadethree))/nrow(hhb2010)*100 ## N
sum(is.na(hhb2010 $lagnunder))/nrow(hhb2010)*100
sum(is.na(hhb2010 $tradeopen))/nrow(hhb2010)*100
sum(is.na(hhb2010 $ccode))/nrow(hhb2010)*100 ##??N
sum(is.na(hhb2010 $year))/nrow(hhb2010)*100 ## N

analysis <- as.data.frame(cbind(hhb2010$signed, hhb2010$gdpenl, hhb2010$lagecgrow, hhb2010$lagreserv, hhb2010$polity2, hhb2010$polsq, hhb2010$lagsumun, hhb2010$tradeopen, hhb2010$lagnunder))

missing <-as.data.frame(cbind(as.integer(complete.cases(hhb2010$signed)), as.integer(complete.cases(hhb2010$gdpenl)), as.integer(complete.cases(hhb2010$lagecgrow)), as.integer(complete.cases(hhb2010$lagreserv)), as.integer(complete.cases(hhb2010$polity2)), as.integer(complete.cases(hhb2010$polsq)), as.integer(complete.cases(hhb2010$lagsumun)), as.integer(complete.cases(hhb2010$tradeopen)), as.integer(complete.cases(hhb2010$lagnunder))))

dim(analysis)
dim(missing)
apply(missing, 2, sd)

## Remove analysis variables
## hhb2010$signed <- hhb2010$gdpenl <- hhb2010$lagecgrow <- hhb2010$lagreserv <- hhb2010$polity2 <- hhb2010$polsq <- hhb2010$lagsumun <- hhb2010$tradeopen <- NULL
## head(hhb2010)
## var(hhb2010)

## Check correlations and missing values
round(cor(hhb2010$wars, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$wars, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$wars))/nrow(hhb2010)*100
## N
hhb2010$wars <- NULL

round(cor(hhb2010$war , analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$war , missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$war ))/nrow(hhb2010)*100
## N
hhb2010$war  <- NULL

round(cor(hhb2010$warl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$warl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$warl))/nrow(hhb2010)*100
## N
hhb2010$warl <- NULL

round(cor(hhb2010$ethonset, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ethonset, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ethonset))/nrow(hhb2010)*100
## N
hhb2010$ethonset <- NULL

round(cor(hhb2010$wars, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$wars, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$wars))/nrow(hhb2010)*100
## N
hhb2010$wars <- NULL

round(cor(hhb2010$durest, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$durest, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$durest))/nrow(hhb2010)*100
## N
hhb2010$durest <- NULL

round(cor(hhb2010$aim , analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$aim , missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$aim ))/nrow(hhb2010)*100
## N
hhb2010$aim  <- NULL

round(cor(hhb2010$ended, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ended, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ended))/nrow(hhb2010)*100
## N
hhb2010$ended <- NULL

round(cor(hhb2010$ethwar, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ethwar, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ethwar))/nrow(hhb2010)*100
## N
hhb2010$ethwar <- NULL

round(cor(hhb2010$pop, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$pop, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$pop))/nrow(hhb2010)*100
## N
hhb2010$pop <- NULL

round(cor(hhb2010$lpop, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lpop, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lpop))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$gdpen, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$gdpen, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$gdpen))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$gdptype, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$gdptype, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$gdptype))/nrow(hhb2010)*100
## N
hhb2010$gdptype <- NULL

round(cor(hhb2010$lgdpenl1, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lgdpenl1, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lgdpenl1))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$region, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$region, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$region))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$colbrit, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$colbrit, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$colbrit))/nrow(hhb2010)*100
## N
hhb2010$colbrit <- NULL

round(cor(hhb2010$colfra, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$colfra, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$colfra))/nrow(hhb2010)*100
## N
hhb2010$colfra <- NULL

round(cor(hhb2010$mtnest, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$mtnest, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$mtnest))/nrow(hhb2010)*100
## N
hhb2010$mtnest <- NULL

round(cor(hhb2010$elevdiff, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$elevdiff, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$elevdiff))/nrow(hhb2010)*100
## N
hhb2010$elevdiff <- NULL

round(cor(hhb2010$ncontig, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ncontig, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ncontig))/nrow(hhb2010)*100
## N
hhb2010$ncontig <- NULL

round(cor(hhb2010$ethfrac, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ethfrac, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ethfrac))/nrow(hhb2010)*100
## N
hhb2010$ethfrac <- NULL

round(cor(hhb2010$ef, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$ef, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$ef))/nrow(hhb2010)*100
## N
hhb2010$ef <- NULL

round(cor(hhb2010$plural, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$plural, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$plural))/nrow(hhb2010)*100
## N
hhb2010$plural <- NULL

round(cor(hhb2010$second, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$second, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$second))/nrow(hhb2010)*100
## N
hhb2010$second <- NULL

round(cor(hhb2010$numlang, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$numlang, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$numlang))/nrow(hhb2010)*100
## N
hhb2010$numlang <- NULL

round(cor(hhb2010$relfrac, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$relfrac, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$relfrac))/nrow(hhb2010)*100
## N
hhb2010$relfrac <- NULL

round(cor(hhb2010$plurrel, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$plurrel, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$plurrel))/nrow(hhb2010)*100
## N
hhb2010$plurrel <- NULL

round(cor(hhb2010$minrelpc, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$minrelpc, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$minrelpc))/nrow(hhb2010)*100
## N
hhb2010$minrelpc <- NULL

round(cor(hhb2010$muslim, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$muslim, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$muslim))/nrow(hhb2010)*100
## N
hhb2010$muslim <- NULL

round(cor(hhb2010$nwstate, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$nwstate, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$nwstate))/nrow(hhb2010)*100
## N
hhb2010$nwstate <- NULL

round(cor(hhb2010$polity2l, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$polity2l, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$polity2l))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$instab, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$instab, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$instab))/nrow(hhb2010)*100
## N
hhb2010$instab <- NULL

round(cor(hhb2010$anocl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$anocl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$anocl))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$deml, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$deml, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$deml))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$empethfrac, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$empethfrac, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$empethfrac))/nrow(hhb2010)*100
## N
hhb2010$empethfrac <- NULL

round(cor(hhb2010$empwarl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$empwarl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$empwarl))/nrow(hhb2010)*100
## N
hhb2010$empwarl <- NULL

round(cor(hhb2010$emponset, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$emponset, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$emponset))/nrow(hhb2010)*100
## N
hhb2010$emponset <- NULL

round(cor(hhb2010$empgdpenl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$empgdpenl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$empgdpenl))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$emplpopl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$emplpopl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$emplpopl))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$emplmtnest, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$emplmtnest, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$emplmtnest))/nrow(hhb2010)*100
## N
hhb2010$emplmtnest <- NULL

round(cor(hhb2010$empncontig, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$empncontig, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$empncontig))/nrow(hhb2010)*100
## N
hhb2010$empncontig <- NULL

round(cor(hhb2010$empolity2l, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$empolity2l, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$empolity2l))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$colwars, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$colwars, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$colwars))/nrow(hhb2010)*100
## N
hhb2010$colwars <- NULL

round(cor(hhb2010$colonset, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$colonset, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$colonset))/nrow(hhb2010)*100
## N
hhb2010$colonset <- NULL

round(cor(hhb2010$cowwars, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$cowwars, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$cowwars))/nrow(hhb2010)*100
## N
hhb2010$cowwars <- NULL

round(cor(hhb2010$cowonset, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$cowonset, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$cowonset))/nrow(hhb2010)*100
## N
hhb2010$cowonset <- NULL

round(cor(hhb2010$cowwarl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$cowwarl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$cowwarl))/nrow(hhb2010)*100
## N
hhb2010$cowwarl <- NULL

round(cor(hhb2010$colwarl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$colwarl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$colwarl))/nrow(hhb2010)*100
## N
hhb2010$colwarl <- NULL

round(cor(hhb2010$deadgrouped, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$deadgrouped, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$deadgrouped))/nrow(hhb2010)*100
## N
hhb2010$deadgrouped <- NULL

round(cor(hhb2010$onset2, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$onset2, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$onset2))/nrow(hhb2010)*100
## N
hhb2010$onset2 <- NULL

round(cor(hhb2010$Inv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$Inv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$Inv))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$totdebtserv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$totdebtserv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$totdebtserv))/nrow(hhb2010)*100
## N
hhb2010$totdebtserv <- NULL

round(cor(hhb2010$sumunder, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$sumunder, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$sumunder))/nrow(hhb2010)*100
## N
hhb2010$sumunder <- NULL

round(cor(hhb2010$lagunder, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagunder, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagunder))/nrow(hhb2010)*100
## N
hhb2010$lagunder <- NULL

round(cor(hhb2010$laginv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$laginv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$laginv))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$lagpol2, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagpol2, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagpol2))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$lagdebtserv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagdebtserv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagdebtserv))/nrow(hhb2010)*100
## N
hhb2010$lagdebtserv <- NULL

round(cor(hhb2010$lagwardead, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagwardead, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagwardead))/nrow(hhb2010)*100
## N
hhb2010$lagwardead <- NULL

round(cor(hhb2010$banksWCI, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$banksWCI, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$banksWCI))/nrow(hhb2010)*100
## N
hhb2010$banksWCI <- NULL

round(cor(hhb2010$lagWCI, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagWCI, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagWCI))/nrow(hhb2010)*100
## N
hhb2010$lagWCI <- NULL

round(cor(hhb2010$pcw, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$pcw, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$pcw))/nrow(hhb2010)*100
## N
hhb2010$pcw <- NULL

round(cor(hhb2010$wardead, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$wardead, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$wardead))/nrow(hhb2010)*100
## N
hhb2010$wardead <- NULL

round(cor(hhb2010$anydead, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$anydead, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$anydead))/nrow(hhb2010)*100
## N
hhb2010$anydead <- NULL

round(cor(hhb2010$var101, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$var101, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$var101))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$peaceyrs, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$peaceyrs, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$peaceyrs))/nrow(hhb2010)*100
## N
hhb2010$peaceyrs <- NULL

round(cor(hhb2010$peaceyrs2, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$peaceyrs2, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$peaceyrs2))/nrow(hhb2010)*100
## N
hhb2010$peaceyrs2 <- NULL

round(cor(hhb2010$psumun1, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$psumun1, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$psumun1))/nrow(hhb2010)*100
## N
hhb2010$psumun1 <- NULL

round(cor(hhb2010$psumun2, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$psumun2, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$psumun2))/nrow(hhb2010)*100
## N
hhb2010$psumun2 <- NULL

round(cor(hhb2010$psumun3, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$psumun3, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$psumun3))/nrow(hhb2010)*100
## N
hhb2010$psumun3 <- NULL

round(cor(hhb2010$pspline1, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$pspline1, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$pspline1))/nrow(hhb2010)*100
## N
hhb2010$pspline1 <- NULL

round(cor(hhb2010$pspline2, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$pspline2, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$pspline2))/nrow(hhb2010)*100
## N
hhb2010$pspline2 <- NULL

round(cor(hhb2010$pspline3, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$pspline3, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$pspline3))/nrow(hhb2010)*100
## N
hhb2010$pspline3 <- NULL

round(cor(hhb2010$surfacearea, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$surfacearea, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$surfacearea))/nrow(hhb2010)*100
## N
hhb2010$surfacearea <- NULL

round(cor(hhb2010$lanydead, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lanydead, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lanydead))/nrow(hhb2010)*100
## N
hhb2010$lanydead <- NULL

round(cor(hhb2010$outlier, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$outlier, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$outlier))/nrow(hhb2010)*100
## N
hhb2010$outlier <- NULL

round(cor(hhb2010$efunder, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$efunder, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$efunder))/nrow(hhb2010)*100
## N
hhb2010$efunder <- NULL

round(cor(hhb2010$efsigned, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$efsigned, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$efsigned))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$elecl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$elecl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$elecl))/nrow(hhb2010)*100
## N
hhb2010$elecl <- NULL

round(cor(hhb2010$quintone, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quintone, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quintone))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$under, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$under, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$under))/nrow(hhb2010)*100
## N
hhb2010$under <- NULL

round(cor(hhb2010$quinttwo, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quinttwo, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quinttwo))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$quintthree, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quintthree, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quintthree))/nrow(hhb2010)*100
## N
hhb2010$quintthree <- NULL

round(cor(hhb2010$quintfour, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quintfour, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quintfour))/nrow(hhb2010)*100
## N
hhb2010$quintfour <- NULL

round(cor(hhb2010$quintfive, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quintfive, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quintfive))/nrow(hhb2010)*100
## N
hhb2010$quintfive <- NULL

round(cor(hhb2010$quintsix, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$quintsix, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$quintsix))/nrow(hhb2010)*100
## N
hhb2010$quintsix <- NULL

round(cor(hhb2010$fifteenone, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$fifteenone, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$fifteenone))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$fifteentwo, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$fifteentwo, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$fifteentwo))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$futecgrow, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$futecgrow, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$futecgrow))/nrow(hhb2010)*100
## N
hhb2010$futecgrow <- NULL

round(cor(hhb2010$polglob, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$polglob, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$polglob))/nrow(hhb2010)*100
## Y

round(cor(hhb2010$lagpolglob, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$lagpolglob, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$lagpolglob))/nrow(hhb2010)*100
## N
hhb2010$lagpolglob <- NULL

round(cor(hhb2010$nunder, analysis, use = "pairwise.complete.obs"), 2)
round(cor(hhb2010$nunder, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(hhb2010$nunder))/nrow(hhb2010)*100
## Y

## Imputation
head(hhb2010)
dim(hhb2010)

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(hhb2010)
mean(NAs(hhb2010)/nrow(hhb2010))*100

## Thus: 6 imputations

## Note: onset, signed already lagged
set.seed(02138)
hhb2010.out <- amelia(hhb2010, m = 6, ts = "year", cs = "ccode", polytime = 3, empri = 0.01*nrow(hhb2010))

write.amelia(obj=hhb2010.out, file.stem = "HHB2010 IO Imp Data", format = "dta", separate = FALSE)
